# Set Working Directory before starting
# Immcantation
suppressPackageStartupMessages(library(alakazam))
suppressPackageStartupMessages(library(shazam))
suppressPackageStartupMessages(library(tigger))
suppressPackageStartupMessages(library(dowser))
# scRepertoire
suppressPackageStartupMessages(library(scRepertoire))
suppressPackageStartupMessages(library(glmGamPoi))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(dittoSeq))
suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(devtools))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(ggpubr))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(cowplot))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(impactSingleCellToolkit))
# Input files
processed_objects_fh <- "objects/processed_objects.RData"
imm_heavy_results_fh <- "data/results_immcantation/BCR_CSFPB_heavy_germ-pass.tsv"
input_bcr_fh <- "data/input_immcantation/input_bcr_filtered_contig_annotations.csv"
input_tcr_fh <- "data/input_immcantation/input_tcr_filtered_contig_annotations.csv"
seed.number <- 123456
source("resources/functions_workshop.R")
load(processed_objects_fh)
# Add unique_cell_id column to seurat objects
bcell_obj$unique_cell_id <- row.names(bcell_obj@meta.data)
# Remove cells with no annotations (2 cells)
tcr_df <- tcr_df[! tcr_df$SUBTYPE_ANNOTATION %in% "No Annot",]
bcr_df <- bcr_df[! bcr_df$SUBTYPE_ANNOTATION %in% "No Annot",]
# Simplify V/J genes
tcr_df$v_call <- tstrsplit(tcr_df$v_call,"\\*")[[1]]
tcr_df$j_call <- tstrsplit(tcr_df$j_call,"\\*")[[1]]
bcr_df$v_call <- tstrsplit(bcr_df$v_call,"\\*")[[1]]
bcr_df$j_call <- tstrsplit(bcr_df$j_call,"\\*")[[1]]
# Add in chain columns
bcr_df$chain <- bcr_df$locus
tcr_df$chain <- tcr_df$locus
# For B cells, IgSeq analyses are done on the Heavy chain only
bcr_results_igh <- bcr_df[bcr_df$locus %in% "IGH",]
input_bcr_df <- read.csv(input_bcr_fh,stringsAsFactors = F)
# unique_cell_id
barcode_vec <- tstrsplit(input_bcr_df$barcode,"-")[[1]]
sample_vec <- tstrsplit(input_bcr_df$contig_id,":")[[1]]
input_bcr_df$unique_cell_id <- paste(barcode_vec,sample_vec,sep = "-")
# Overlap with Seurat Object
input_bcr_df <- input_bcr_df[input_bcr_df$unique_cell_id %in% colnames(bcell_obj),]
input_bcr_df <- input_bcr_df[input_bcr_df$contig_id %in% bcr_df$sequence_id,]
input_bcr_df$sample <- tstrsplit(input_bcr_df$unique_cell_id,"-")[[2]]
# Create scRepertoire dataframe list
bcr.list <- split(input_bcr_df,input_bcr_df$sample)
uni_samples <- unique(input_bcr_df$sample)
bcell_screp <- combineBCR(bcr.list,
samples = uni_samples)
# Load one contig file
input_tcr_df <- read.csv(input_tcr_fh,stringsAsFactors = F)
# unique_cell_id
barcode_vec <- tstrsplit(input_tcr_df$barcode,"-")[[1]]
sample_vec <- tstrsplit(input_tcr_df$contig_id,":")[[1]]
input_tcr_df$unique_cell_id <- paste(barcode_vec,sample_vec,sep = "-")
# Overlap with Seurat Object
input_tcr_df <- input_tcr_df[input_tcr_df$unique_cell_id %in% colnames(tcell_obj),]
input_tcr_df <- input_tcr_df[input_tcr_df$contig_id %in% tcr_df$sequence_id,]
input_tcr_df$sample <- tstrsplit(input_tcr_df$unique_cell_id,"-")[[2]]
# Create scRepertoire dataframe list
tcr.list <- split(input_tcr_df,input_tcr_df$sample)
uni_samples <- unique(input_tcr_df$sample)
tcell_screp <- combineTCR(tcr.list,
samples = uni_samples)
p_scr1 <- quantContig(bcell_screp,
cloneCall="strict",
chain = "IGH",
scale = FALSE)
p_scr2 <- quantContig(tcell_screp,
cloneCall="strict",
chain = "both",
scale = FALSE)
plot_grid(p_scr1,p_scr2,ncol = 2)
p_scr3 <- clonalHomeostasis(bcell_screp, cloneCall = "strict",chain = "IGH")
p_scr4 <- clonalHomeostasis(tcell_screp, cloneCall = "strict",chain = "both")
plot_grid(p_scr3,p_scr4,ncol = 2)
p_scr5 <- clonalProportion(bcell_screp, chain = "IGH",cloneCall = "strict")
p_scr6 <- clonalProportion(tcell_screp, chain = "both",cloneCall = "strict")
plot_grid(p_scr5,p_scr6,ncol = 2)
vizGenes(bcell_screp, gene = "V",
chain = "IGH",
plot = "bar",
scale = TRUE)
vizGenes(tcell_screp, gene = "V",
chain = "TRB",
plot = "bar",
scale = TRUE)
p_scr5 <- lengthContig(bcell_screp,
cloneCall="aa",
chain = "IGH")
p_scr6 <- lengthContig(tcell_screp,
cloneCall="aa",
chain = "TRB")
plot_grid(p_scr5,p_scr6,ncol = 2)
# BCR
clonalDiversity(bcell_screp,
cloneCall = "strict",
chain = "IGH",
group.by = "sample",
n.boots = 100)
# TCR
clonalDiversity(tcell_screp,
cloneCall = "aa",
chain = "both",
group.by = "sample",
n.boots = 100)
https://bitbucket.org/kleinstein/immcantation/src/master/training/10x_tutorial.md
dist_nearest <- distToNearest(bcr_results_igh)
# Automated Threshold - takes too long to run
# - Threshold setting occurs by default when running the immcantation pipeline
#threshold_output <- shazam::findThreshold(dist_nearest$dist_nearest,
# method = "gmm", model = "gamma-norm",
# cutoff = "user",spc = 0.99)
#threshold <- threshold_output@threshold
# generate Hamming distance histogram
p1 <- ggplot2::ggplot(subset(dist_nearest, !is.na(dist_nearest)),
aes(x = dist_nearest)) +
geom_histogram(color = "white", binwidth = 0.02) +
labs(x = "Hamming distance", y = "Count") +
scale_x_continuous(breaks = seq(0, 1, 0.1)) +
theme_bw() +
theme(axis.title = element_text(size = 18)) + geom_vline(xintercept=0.3, linetype = "longdash",color = "red")
plot(p1)
https://alakazam.readthedocs.io/en/stable/vignettes/Diversity-Vignette/
comp_colors <- c("CSF"="blue", "PB"="tomato2")
t_annot_colors <- c("CD4 T cells"="seagreen", "CD8 T cells"="steelblue")
b_annot_colors <- c("Memory B-cells"="seagreen", "naive B-cells"="steelblue","Plasma cells"="tomato2")
status_colors <- c("Healthy"="seagreen", "Disease"="steelblue")
b_status_colors <- c("Disease"="steelblue")
vfamily <- countGenes(tcr_results_csf, gene="v_call", groups=c("SUBTYPE_ANNOTATION", "STATUS"),
clone="clone_id", mode="family")
jfamily <- countGenes(tcr_results_csf, gene="j_call", groups=c("SUBTYPE_ANNOTATION", "STATUS"),
clone="clone_id", mode="family")
head(vfamily, n=4)
## # A tibble: 4 × 5
## # Groups: SUBTYPE_ANNOTATION, STATUS [3]
## SUBTYPE_ANNOTATION STATUS gene clone_count clone_freq
## <chr> <chr> <chr> <int> <dbl>
## 1 CD4 T cells Disease TRBV16 1 0.00115
## 2 CD4 T cells Disease TRBV21 1 0.00115
## 3 CD4 T cells Healthy TRBV21 1 0.00152
## 4 CD8 T cells Disease TRBV25 1 0.00870
bcr_results_igh_csf <- bcr_results_igh[bcr_results_igh$COMPARTMENT %in% "CSF",]
bcr_results_igh_pb <- bcr_results_igh[bcr_results_igh$COMPARTMENT %in% "PB",]
cfamily_csf <- countGenes(bcr_results_igh_csf, gene="c_call", groups=c("SUBTYPE_ANNOTATION", "STATUS"),
clone="clone_id", mode="family")
cfamily_pb <- countGenes(bcr_results_igh_pb, gene="c_call", groups=c("SUBTYPE_ANNOTATION", "STATUS"),
clone="clone_id", mode="family")
## Warning in countGenes(bcr_results_igh_pb, gene = "c_call", groups =
## c("SUBTYPE_ANNOTATION", : NA(s) found in 1 row(s) of the c_call column and
## excluded from tabulation
head(cfamily_csf, n=4)
## # A tibble: 4 × 5
## # Groups: SUBTYPE_ANNOTATION, STATUS [2]
## SUBTYPE_ANNOTATION STATUS gene clone_count clone_freq
## <chr> <chr> <chr> <int> <dbl>
## 1 Memory B-cells Disease IGHD 1 0.0455
## 2 Memory B-cells Disease IGHG2 1 0.0455
## 3 Memory B-cells Healthy IGHG2 1 0.25
## 4 Memory B-cells Healthy IGHM 1 0.25
g1 <- ggplot(cfamily_csf, aes(x=gene, y=clone_freq)) +
theme_bw() +
ggtitle("Clonal Subtype B cells CSF") +
theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
ylab("Percent of repertoire") +
xlab("") +
scale_color_brewer(palette="Set1") +
geom_point(aes(color=STATUS), size=5, alpha=0.8) +
facet_grid(. ~ SUBTYPE_ANNOTATION)
g2 <- ggplot(cfamily_pb, aes(x=gene, y=clone_freq)) +
theme_bw() +
ggtitle("Clonal Subtype B cells PB") +
theme(axis.text.x=element_text(angle=45, hjust=1, vjust=1)) +
ylab("Percent of repertoire") +
xlab("") +
scale_color_brewer(palette="Set1") +
geom_point(aes(color=STATUS), size=5, alpha=0.8) +
facet_grid(. ~ SUBTYPE_ANNOTATION)
plot_grid(g1,g2,nrow=2)
Local species diversity (in this case, used for B & T cell populations)
Interpretation of X-axis diversity scores 0 : species richness (R) (general diversity index) 0->1 : Shannon Diversity => exponent of Shannon-Weiner index 1->2 : Simpson Diversity => inverse of the Simpson index 2->4 : inverse reciprocal of the largest clone, diversity of most abundant clone
comp_colors <- c("CSF"="blue", "PB"="tomato2")
comp_colors2 <- c("PB"="tomato2")
set.seed(seed.number)
bcr_results_igh_igm <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHM",]
bcr_results_igh_iga1 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHA1",]
bcr_results_igh_iga2 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHA2",]
bcr_results_igh_igg1 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHG1",]
bcr_results_igh_igg2 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHG2",]
bcr_results_igh_igg3 <- bcr_results_igh[bcr_results_igh$c_call %in% "IGHG3",]
MIN_OBS <- 2
NBOOT <- 200
a1_curve <- alphaDiversity(bcr_results_igh_igm, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a2_curve <- alphaDiversity(bcr_results_igh_iga1, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a3_curve <- alphaDiversity(bcr_results_igh_iga2, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
## Warning in estimateAbundance(data, ci = 0.95, ...): Not all groups passed
## threshold min_n=2. Excluded: CSF
a4_curve <- alphaDiversity(bcr_results_igh_igg1, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a5_curve <- alphaDiversity(bcr_results_igh_igg2, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a6_curve <- alphaDiversity(bcr_results_igh_igg3, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
table(bcr_results_igh$c_call)
##
## IGHA1 IGHA2 IGHD IGHG1 IGHG2 IGHG3 IGHM
## 39 18 20 45 12 8 448
a1<-plot(a1_curve,main_title="IgM alpha diversity",colors=comp_colors)
a2<-plot(a2_curve,main_title="IgA1 alpha diversity",colors=comp_colors2)
a3<-plot(a3_curve,main_title="IgA2 alpha diversity",colors=comp_colors2)
a4<-plot(a4_curve, main_title="IgG1 alpha diversity",colors=comp_colors)
a5<-plot(a5_curve, main_title="IgG2 alpha diversity",colors=comp_colors2)
a6<-plot(a6_curve, main_title="IgG3 alpha diversity",colors=comp_colors2)
#panel_ad <- plot_grid(a1,a2,a3,a4,a5,a6,nrow = 2)
panel_ad <- plot_grid(a1,a4,a3,nrow = 1)
panel_ad
comp_colors <- c("CSF"="blue", "PB"="tomato2")
comp_colors2 <- c("PB"="tomato2")
set.seed(seed.number)
tcr_results_d <- tcr_df[tcr_df$STATUS %in% "Disease",]
tcr_results_h <- tcr_df[tcr_df$STATUS %in% "Healthy",]
tcr_results_d_cd4 <- tcr_results_d[tcr_results_d$SUBTYPE_ANNOTATION %in% "CD4 T cells",]
tcr_results_d_cd8 <- tcr_results_d[tcr_results_d$SUBTYPE_ANNOTATION %in% "CD8 T cells",]
tcr_results_h_cd4 <- tcr_results_h[tcr_results_h$SUBTYPE_ANNOTATION %in% "CD4 T cells",]
tcr_results_h_cd8 <- tcr_results_h[tcr_results_h$SUBTYPE_ANNOTATION %in% "CD8 T cells",]
MIN_OBS <- 2
NBOOT <- 200
a7_curve <- alphaDiversity(tcr_results_d_cd4, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a8_curve <- alphaDiversity(tcr_results_d_cd8, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a9_curve <- alphaDiversity(tcr_results_h_cd4, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a10_curve <- alphaDiversity(tcr_results_h_cd8, group="COMPARTMENT",min_q=0, max_q=4, step_q=0.1,ci=0.95, nboot=NBOOT,min_n=MIN_OBS)
a7<-plot(a7_curve,main_title="CD4 Disease alpha diversity",colors=comp_colors)
a8<-plot(a8_curve,main_title="CD8 Disease alpha diversity",colors=comp_colors)
panel_tcr_ad <- plot_grid(a7,a8,nrow = 1)
panel_tcr_ad
a9<-plot(a9_curve,main_title="CD4 Healthy alpha diversity",colors=comp_colors)
a10<-plot(a10_curve, main_title="CD8 Healthy alpha diversity",colors=comp_colors)
panel_tcr_ad2 <- plot_grid(a9,a10,nrow = 1)
panel_tcr_ad2
set.seed(seed.number)
# Calculate combined R and S mutation frequencies
bcr_csf_obs <- observedMutations(bcr_results_igh_csf, sequenceColumn="sequence_alignment",
germlineColumn="germline_alignment_d_mask",
regionDefinition=NULL,
frequency=TRUE,
combine=TRUE,
nproc=4)
bcr_pb_obs <- observedMutations(bcr_results_igh_pb, sequenceColumn="sequence_alignment",
germlineColumn="germline_alignment_d_mask",
regionDefinition=NULL,
frequency=TRUE,
combine=TRUE,
nproc=4)
# CSF
bcr_csf_mut_freq_clone <- bcr_csf_obs %>%
dplyr::group_by(clone_id) %>%
dplyr::summarize(median_mut_freq = median(mu_freq))
bcr_pb_mut_freq_clone <- bcr_pb_obs %>%
dplyr::group_by(clone_id) %>%
dplyr::summarize(median_mut_freq = median(mu_freq))
p_mut_csf <- ggplot(bcr_csf_mut_freq_clone, aes(median_mut_freq)) +
geom_histogram(binwidth = 0.005) +
theme_bw() + theme(axis.title = element_text(size = 18)) + ggtitle("CSF - Median clonotype mutations")
p_mut_pb <- ggplot(bcr_pb_mut_freq_clone, aes(median_mut_freq)) +
geom_histogram(binwidth = 0.005) +
theme_bw() + theme(axis.title = element_text(size = 18)) + ggtitle("PB - Median clonotype mutations")
plot_grid(p_mut_pb,p_mut_csf,ncol = 2)
g1 <- ggplot(bcr_csf_obs, aes(x=c_call, y=mu_freq, fill=c_call)) +
theme_bw() + ggtitle("BCR CSF Total mutations") +
xlab("Isotype") + ylab("Mutation frequency") +
scale_fill_brewer(palette="OrRd") +
geom_boxplot() + stat_compare_means(label = "p.signif", aes(group=c_call), hide.ns = TRUE)
g2 <- ggplot(bcr_pb_obs, aes(x=c_call, y=mu_freq, fill=c_call)) +
theme_bw() + ggtitle("BCR PB Total mutations") +
xlab("Isotype") + ylab("Mutation frequency") +
scale_fill_brewer(palette="OrRd") +
geom_boxplot() + stat_compare_means(label = "p.signif", aes(group=c_call), hide.ns = TRUE)
grid.arrange(g1,g2,nrow=2)
set.seed(seed.number)
bcr_results_igh_d <- bcr_results_igh[bcr_results_igh$STATUS %in% "Disease",]
clone_db <- collapseClones(bcr_results_igh_d, nproc=1)
model_disease <- createTargetingModel(clone_db, model="rs", sequenceColumn="clonal_sequence", germlineColumn="clonal_germline")
## Warning in createMutabilityMatrix(db, sub_mat, model = model, sequenceColumn =
## sequenceColumn, : Insufficient number of mutations to infer some 5-mers. Filled
## with 0.
m1 <- plotMutability(model_disease, nucleotides="A", style="hedgehog")
m2 <- plotMutability(model_disease, nucleotides="C", style="hedgehog")
m3 <- plotMutability(model_disease, nucleotides="G", style="hedgehog")
m4 <- plotMutability(model_disease, nucleotides="T", style="hedgehog")
# No BCR clones of sufficient size for making trees with dowser
input_trees <- bcr_df
input_trees$subject_id <- tstrsplit(input_trees$sample,"_")[[1]]
clones <- formatClones(input_trees,minseq = 2, nproc = 4)
## Warning in processClones(clones, nproc = nproc, seq = seq_name, minseq =
## minseq): All clones have less than minseq = 2 sequences
print(clones)
## # A tibble: 0 × 2
## # Rowwise:
## # ℹ 2 variables: clone_id <chr>, data <list>
# Follow tutorial instead
# Load example data object
data(ExampleClones)
# Pick the two largest clonotypes
ExampleClones = ExampleClones[1:2,]
plots = plotTrees(ExampleClones)
plots = plotTrees(ExampleClones, tips="c_call")
plots[[1]]